{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## Notebook for comparing tests\n",
    "### Testing distributions\n",
    "\n",
    "This notebook is trying to compare different drift tests and highlight each strength. Every test is useful! We just have to use it in a fitted scenario!"
   ],
   "metadata": {
    "id": "QztD6LMKrfn7"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Import libraries"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from scipy import stats\n",
    "\n",
    "from evidently.calculations.stattests import StatTest\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.tests import TestColumnDrift\n",
    "\n",
    "from plotly import graph_objs as go\n",
    "import plotly.express as px"
   ],
   "outputs": [],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "yPH0tLabtNpZ",
    "outputId": "d109922a-9eb8-4918-d01c-874e9cc58dc1"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ],
   "outputs": [],
   "metadata": {
    "id": "rlj43ygQtjGg"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Prepare Datasets/Distributions"
   ],
   "metadata": {
    "id": "L8YdHdn9rmcg"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Now we are going to define four distributions with different types of drifts for the two samples"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#function that will help us define sample and control group\n",
    "\n",
    "def give_me_smp_cntr_df(sample1,sample2):\n",
    "    \"\"\"\n",
    "    It receives two arrays of the produced sample distributions and\n",
    "    returns two dataframes that have the sample and control groups to test later the drift\n",
    "    \"\"\"\n",
    "    sample_df = pd.DataFrame(np.array([sample1,sample2]).T,columns=['sample_group','control_group'])\n",
    "    #initial dataset\n",
    "    smp_df=sample_df['sample_group'].reset_index().rename(columns={'sample_group': \"test_group\"})\n",
    "    #control dataset\n",
    "    cntr_df=sample_df['control_group'].reset_index().rename(columns={'control_group': \"test_group\"})\n",
    "    return smp_df,cntr_df\n"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# General gamma distribution\n",
    "\n",
    "a, c = 3, -1.02\n",
    "#defining sample 1\n",
    "r1 = stats.gengamma.rvs(a, c, size=1000)\n",
    "\n",
    "a, c = 3, -1.32\n",
    "#defining sample 2\n",
    "r2 = stats.gengamma.rvs(a, c, size=1000)\n",
    "\n",
    "smp_df,cntr_df = give_me_smp_cntr_df(r1,r2)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Normal distribution\n",
    "\n",
    "mu, sigma = 0, 0.08 # mean and standard deviation\n",
    "normal = np.random.normal(mu, sigma, 1000)\n",
    "\n",
    "mu, sigma = 0, 0.05 # mean and standard deviation\n",
    "normal2 = np.random.normal(mu, sigma, 1000)\n",
    "\n",
    "smp_df2,cntr_df2 = give_me_smp_cntr_df(normal,normal2)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Discrete bionmal\n",
    "\n",
    "n=10\n",
    "p=0.8\n",
    "\n",
    "data_binom = stats.binom.rvs(10,0.8,size=1000)\n",
    "data_binom2 = stats.binom.rvs(10,0.75,size=1000)\n",
    "\n",
    "smp_df3,cntr_df3 = give_me_smp_cntr_df(data_binom,data_binom2)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Discrete poisson\n",
    "\n",
    "mu=1.5\n",
    "data_poisson = stats.poisson.rvs(mu=1.5, size=2000)\n",
    "data_poisson2 = stats.poisson.rvs(mu=2, size=2000)\n",
    "\n",
    "smp_df4,cntr_df4 = give_me_smp_cntr_df(data_poisson,data_poisson2)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Define custom tests"
   ],
   "metadata": {
    "id": "It_sFjB1scv6"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "Here we are defining custom test."
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "First define Mann-Whitney U-rank\n",
    "\n",
    "sources:"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "from scipy.stats import mannwhitneyu\n",
    "\n",
    "def mannwhitneyu_rank(\n",
    "    reference_data: pd.Series,\n",
    "    current_data: pd.Series,\n",
    "    feature_type: str,\n",
    "    threshold: float,\n",
    "    use_continuity: bool = True\n",
    "):\n",
    "    \"\"\"Calculate the Mann-Whitney U-rank test between two arrays\n",
    "    Args:\n",
    "        reference_data: reference data\n",
    "        current_data: current data\n",
    "        feature_type: feature type\n",
    "        threshold: all values above this threshold means data drift\n",
    "        use_continuity: take or not into account continuity correction\n",
    "    Returns:\n",
    "        pvalue: the p-value for the test depending on alternative and method\n",
    "        test_result: whether the drift is detected\n",
    "    \"\"\"\n",
    "    wil_p_value = mannwhitneyu(x=reference_data, y=current_data,use_continuity=use_continuity)[1]\n",
    "    return wil_p_value, wil_p_value < threshold\n",
    "\n",
    "\n",
    "mann_whitney_u_stat_test = StatTest(\n",
    "    name=\"mannw\",\n",
    "    display_name=\"Mann-Whitney U-rank test\",\n",
    "    func=mannwhitneyu_rank,\n",
    "    allowed_feature_types=[\"num\"],\n",
    "    default_threshold=0.05\n",
    ")"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "from scipy.stats import epps_singleton_2samp\n",
    "\n",
    "def _epps_singleton(\n",
    "    reference_data: pd.Series,\n",
    "    current_data: pd.Series,\n",
    "    feature_type: str,\n",
    "    threshold: float):\n",
    "    \"\"\"Run the Epps-Singleton (ES) test of two samples.\n",
    "    Args:\n",
    "        reference_data: reference data\n",
    "        current_data: current data\n",
    "        threshold: level of significance (default will be 0.05)\n",
    "    Returns:\n",
    "        p_value: p-value based on the asymptotic chi2-distribution.\n",
    "        test_result: whether the drift is detected\n",
    "    \"\"\"\n",
    "    p_value = epps_singleton_2samp(reference_data, current_data)[1]\n",
    "    return p_value, p_value < threshold\n",
    "\n",
    "\n",
    "epps_singleton_test = StatTest(\n",
    "    name=\"es\",\n",
    "    display_name=\"Epps-Singleton\",\n",
    "    func=_epps_singleton,\n",
    "    allowed_feature_types=[\"num\"],\n",
    "    default_threshold=0.05\n",
    ")"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "feature = 'test_group'\n",
    "\n",
    "data_drift_dataset_tests = TestSuite(tests=[\n",
    "    TestColumnDrift(column_name=feature, stattest=mann_whitney_u_stat_test),\n",
    "    TestColumnDrift(column_name=feature, stattest=epps_singleton_test),\n",
    "    TestColumnDrift(column_name=feature, stattest='ks'),\n",
    "    TestColumnDrift(column_name=feature, stattest='anderson'),\n",
    "    TestColumnDrift(column_name=feature, stattest='cramer_von_mises')\n",
    "])"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Define function for checking p-values per population\n",
    "\n",
    "def create_test_result_dataset(data_drift_dataset_tests):\n",
    "    d = []\n",
    "\n",
    "    for tests in data_drift_dataset_tests.as_dict()['tests']:\n",
    "        d2 = []\n",
    "        d2.append(tests['parameters']['features']['test_group']['stattest_name'])\n",
    "        d2.append(tests['parameters']['features']['test_group']['score'])\n",
    "\n",
    "        #added the test name and drift score(p-value or distance)\n",
    "        d.append(d2)\n",
    "\n",
    "    df = pd.DataFrame(d, columns = ['test','p-value'])\n",
    "\n",
    "    return df"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Run tests"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Poisson distribution\n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Histogram(x=data_poisson, nbinsx=40, name='data_poisson'))\n",
    "fig.add_trace(go.Histogram(x=data_poisson2, nbinsx=40, name='data_poisson2'))\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Poisson distribution\n",
    "df_n = pd.DataFrame()\n",
    "\n",
    "for n in range(100,1100,100):\n",
    "    data_drift_dataset_tests.run(reference_data = smp_df4[0:n], current_data = cntr_df4[0:n])\n",
    "    df = create_test_result_dataset(data_drift_dataset_tests)\n",
    "    df['data_length'] = n\n",
    "    df_n = pd.concat([df_n, df])"
   ],
   "outputs": [],
   "metadata": {
    "scrolled": true
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Poisson distribution\n",
    "fig = px.line(\n",
    "    df_n.reset_index(), \n",
    "    x=\"data_length\", \n",
    "    y=\"p-value\", \n",
    "    color=\"test\")\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "\"When comparing the incomes of two different groups (especially groups that span the socioeconomic\n",
    "spectrum), the distributions will likely be highly variable and highly skewed. In such a case,\n",
    "it might be better to use a nonparametric test like Wilcoxon’s signed-rank test.\"\n",
    "\n",
    "\"This is a paired test that compares the medians of two distributions\"\n",
    "\n",
    "Of course for this case the Mann-Whitney U test is similar to the Wilcoxon test, but can be used to compare\n",
    "multiple samples that aren’t necessarily paired.\n",
    "\n",
    "source: https://www.mit.edu/~6.s085/notes/lecture5.pdf 5.1.3 Wilcoxon’s signed-rank test"
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "Anderson and cramer von mises perform also good at this case"
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "Let's see another case of discrete population drift:"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Binomal distribution\n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Histogram(x=data_binom, nbinsx=40, name='data_binom'))\n",
    "fig.add_trace(go.Histogram(x=data_binom2, nbinsx=40, name='data_binom2'))\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Binomal distribution\n",
    "df_n=pd.DataFrame()\n",
    "\n",
    "for n in range(100,1100,100):\n",
    "    \n",
    "    data_drift_dataset_tests.run(reference_data=smp_df3[0:n], current_data=cntr_df3[0:n])\n",
    "    df = create_test_result_dataset(data_drift_dataset_tests)\n",
    "    df['data_length'] = n\n",
    "    df_n=pd.concat([df_n, df])"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Binomal distribution\n",
    "fig = px.line(\n",
    "    df_n.reset_index(), \n",
    "    x=\"data_length\", \n",
    "    y=\"p-value\", \n",
    "    color=\"test\")\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "Again, KS seems to be slower to realize that the two distributions are different."
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "But wait, when is the Mann-Whitney U actually not good at detecting drifts? and KS (as well other tests) better?\n",
    "\n",
    "Mann-Whitney U mentions that uses medians to do the tests. So lets try with two normal distributions"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Normal distribution\n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Histogram(x=normal, nbinsx=40, name='normal'))\n",
    "fig.add_trace(go.Histogram(x=normal2, nbinsx=40, name='normal2'))\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#Normal distribution\n",
    "df_n=pd.DataFrame()\n",
    "\n",
    "for n in range(100,1100,100):\n",
    "    \n",
    "    data_drift_dataset_tests.run(reference_data=smp_df2[0:n], current_data=cntr_df2[0:n])\n",
    "    df = create_test_result_dataset(data_drift_dataset_tests)\n",
    "    df['data_length'] = n\n",
    "    df_n=pd.concat([df_n, df])"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Normal distribution\n",
    "fig = px.line(\n",
    "    df_n.reset_index(), \n",
    "    x=\"data_length\", \n",
    "    y=\"p-value\", \n",
    "    color=\"test\")\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "As you see here the Mann-Whitney U test never converges to a low p-value as others. It uses medians to find the differences however, here we have a different sigma as the main difference. Thus, at such type of drift, Mann-Whitney U will not fail.\n",
    "\n",
    "In this short example you saw how a statistical test based on its strength points or weak can detect or not detect drift.\n",
    "\n",
    "Every test is good for specific cases.\n",
    "\n",
    "## So choose wisely!!"
   ],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "Want to plug and play?\n",
    "1) Define your population\n",
    "2) Run the tests\n",
    "3) Select the test based on drift detection plot  "
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Your distribution\n",
    "a, c = 3, -1.02\n",
    "mean, var, skew, kurt = stats.gengamma.stats(a, c, moments='mvsk')\n",
    "your_r = stats.gengamma.rvs(a, c, size=2000)\n",
    "\n",
    "a, c = 2.5, -1.02\n",
    "mean, var, skew, kurt = stats.gengamma.stats(a, c, moments='mvsk')\n",
    "your_r2 = stats.gengamma.rvs(a, c, size=2000)\n",
    "\n",
    "print(mean, var, skew, kurt)\n",
    "\n",
    "smp_df,cntr_df = give_me_smp_cntr_df(your_r,your_r2)\n"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Your distribution\n",
    "fig = go.Figure()\n",
    "fig.add_trace(go.Histogram(x=your_r, nbinsx=40, name='your_r'))\n",
    "fig.add_trace(go.Histogram(x=your_r2, nbinsx=40, name='your_r2'))\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# gen gamma\n",
    "df_n=pd.DataFrame()\n",
    "\n",
    "for n in range(100,2100,100):\n",
    "    \n",
    "    data_drift_dataset_tests.run(reference_data=smp_df[0:n], current_data=cntr_df[0:n])\n",
    "    df = create_test_result_dataset(data_drift_dataset_tests)\n",
    "    df['data_length'] = n\n",
    "    df_n=pd.concat([df_n, df])"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "# Your distribution\n",
    "fig = px.line(\n",
    "    df_n.reset_index(), \n",
    "    x=\"data_length\", \n",
    "    y=\"p-value\", \n",
    "    color=\"test\")\n",
    "\n",
    "fig.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Support Evidently\n",
    "Enjoyed the tutorial? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "stat_test_specification_for_data_drift_adult.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  },
  "vscode": {
   "interpreter": {
    "hash": "18cc34fa94e0a1282f5044615c7e7a010c96937046ea65cf7b032836bab201fd"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}